###################################
# REGRESSIONS
###################################
# questions to bowydenbraber@gmail.com

library(spdep)

#settings
outcome<-#set outcome

covariates<-c("mine_presabs_before2000",
                 "mun.state",
                 "forest_mean_scaled",
                 "tt_standard1_scaled",
                 "d.pop.00_scaled",
                 "area_scaled",
                 "hhh.inc.avg.00_infl_scaled",
                 "p.hhh.lit.00_scaled",
                 "gini.00_scaled",
                 "p.hh.bs.poor.00_scaled",
                 "slope_scaled",
                 "elevation_scaled",
                 "flooded_scaled")

covariates_mining<-c(covariates,"propsettled_scaled")



#read matched data
full.data<-read.csv()

#read spatial data
cuarea<-readShapeSpatial("CU00M_latlon.shp")
cuarea2 = as(cuarea, "data.frame")
coordinates(cuarea2) = ~Longitude + Latitude 
proj4string(cuarea2)<-CRS("+proj=longlat +datum=WGS84")

#for spatial autocorrelation
full.data_sp<-merge(cuarea2, full.data, by.x="CU_ID_50M", by.y="CU_ID_50M",all.x=FALSE, all.y=TRUE)
coords<-coordinates(full.data_sp)
IDs<-row.names(as(full.data_sp, "data.frame"))
#define distance for spatial autocorrelation
poverty_kn1 <- knn2nb(knearneigh(coords, k = 1), row.names = IDs)
dist <- unlist(nbdists(poverty_kn1, coords))
distsum <- summary(dist)

#define neighbors up to certain distance (third quartile of the distance distribution)
ct_kd1 <- dnearneigh(coords, d1 = 0, d2 = distsum[5], row.names = IDs)
SIDdlist = nbdists(ct_kd1, coords)
SIDdlist = lapply(SIDdlist, function(x) 1/x)

CT_spatial = nb2listw(poverty_kd1, glist=SIDdlist, zero.policy=TRUE)

#ensure that it's a factor
full.data$mun.state<-as.factor(full.data$mun.state)

#limit dataframe
full.data2<-full.data[c(covariates,"tr",outcome,"weights","Latitude.x","Longitude.x")]

#check for NAs
indx <- apply(full.data2, 2, function(x) any(is.na(x) | is.infinite(x)))

#regression
lm.out <- lm(as.formula(paste(outcome,"~tr+",paste(c(covariates),collapse="+"))), data=full.data2,weights=weights)
summary(lm.out)

#heteroskedasticity
lm.out_vcov<-lmtest::coeftest(lm.out, vcov = sandwich::vcovHC(lm.out,type="HC1"))

#check for spatial autocorrelation
moran.ici_pscores<-lm.morantest(lm.out, CT_spatial, alternative="two.sided",zero.policy = TRUE)

